library(data.table)
library(lubridate)
library(stringi)
library(stringr)

set.seed(42)

# Declare working directory beforehand in an environment variable
# IMPERIAL_LEGAL_POLITICS_REPLICATION_PATH = "path_to_your_folder"
# with the aid of usethis::edit_r_environ()
# Restart R session for the changes to take effect
path <- Sys.getenv("IMPERIAL_LEGAL_POLITICS_REPLICATION_PATH")
setwd(path)

# Load an object with case-instance-side-outcome-level data
# created by analysis/1b_prepare_crimea_data.r and
# 1c_prepare_krasnodar_data.r
load("data/crimea_case_outcomes_side_instance.rdata")
load("data/krasnodar_case_outcomes_side_instance.rdata")

##############################
# Aggregate data at case level (with first instance taking precedence)

cols_to_keep <- c("caseid", "courtTag", "caseTypeCode", "caseCategoryUnified", "caseCategoryRedux", "caseCategory_government", "caseCategory_private", "year", "year_factor", "petty_case", "countDocumentsByCourt", "lncountDocumentsByCourt", "days_elapsed", "claimSum_deflated", "claimSum_decile", "recoverySum_deflated", "was_appealed", "registrationDate", "date", "dispute_type", "local_entities_only", names(crimea_case_outcomes_side_instance)[grepl("judge|government|agency|local_entity", names(crimea_case_outcomes_side_instance))])

crimea_cases <- crimea_case_outcomes_side_instance[, lapply(.SD, function(x) x[[1]]), by = "caseid", .SDcols = cols_to_keep ]
crimea_cases[, c("government", "federal_agency", "regional_agency", "municipal_agency", "local_entity") := NULL ]

cols_to_keep <- c("caseid", "courtTag", "caseTypeCode", "caseCategoryUnified", "caseCategoryRedux", "caseCategory_government", "caseCategory_private", "year", "year_factor", "petty_case", "countDocumentsByCourt", "lncountDocumentsByCourt", "days_elapsed", "claimSum_deflated", "claimSum_decile", "recoverySum_deflated", "was_appealed", "registrationDate", "date", "dispute_type", "local_entities_only", names(krasnodar_case_outcomes_side_instance)[grepl("judge|government|agency|local_entity", names(krasnodar_case_outcomes_side_instance))])

krasnodar_cases <- krasnodar_case_outcomes_side_instance[, lapply(.SD, function(x) x[[1]]), by = "caseid", .SDcols = cols_to_keep ]
krasnodar_cases[, c("government", "federal_agency", "regional_agency", "municipal_agency", "local_entity") := NULL ]

# Generate case months
crimea_cases[, registrationmonth := floor_date(registrationDate, "month")]
krasnodar_cases[, registrationmonth := floor_date(registrationDate, "month")]

##############################
# Function to sample cases and compute
# simulated and empirical IQRs of judge-level
# means of case observables
#
# INPUTS:	casedata - data.table where each row is one case
#			id — character object with variable name for case
#				id in casedata
#			court — ... for court id in casedata
#			judge — ... for judge id in casedata
#			category — ... for case category id in casedata
#			time — ... for time period id in casedata
#			observables — character with variable names of observables
#					 in casedata to compute judge means and IQRs
#			simulations — how many simulations to consider
#
# OUTPUTS: data.table where each row is observable and columns
#			are empirical and simulate IQRs of their judge means
#			and p-values from one-sided test
#
# USAGE: test_random_case_assignment(arbitrazh, id = "caseid",
#									court = "courtag",
#									judge = "presiding_judge",
#									category = "caseCategoryUnified",
#									time = "registrationMonth",
#									observables = c("was_appealed", "claimSum")
#									simulations = 1000)
									
test_random_case_assignment <- function(casedata, id, court, judge, category, time, observables, simulations = 100) {
	
	# debug: casedata <- crimea_cases[ involved_government == 0 & local_entities_only == 0 ]; id <- "caseid"; court <- "courtTag"; judge <- "presiding_judge"; category <- "caseCategoryRedux"; time <- "registrationmonth"; observables <- c("petty_case", "countDocumentsByCourt", "days_elapsed", "claimSum_deflated", "was_appealed", "government_plaintiff", "local_entity_plaintiff", "government_win_first", "local_entity_win_first"); simulations <- 100

	# Create an indicator for cell (court*category*month interaction)
	casedata[, cell := paste0(get(court), "@", get(category), "@", get(time))]
	
	# Remove observations with less than 10 cases per cell
	cases_per_cell <- casedata[, list(cases = uniqueN(get(id))), by = "cell"]
	casedata <- casedata[ cell %in% cases_per_cell[ cases >= 10]$cell ]
	
	# Create a progress bar to report on simulation progress
	message(paste0("Iterating through ", simulations, " samples:"))
	iterations_progess_bar <- txtProgressBar(min = 0, max = simulations, style = 3)
	
	# Init a list to store judge-level means from simulations
	simulated_judge_means_list <- list()
	
	for(i in 1:simulations) {

		# Draw a random case id within cell
		casedata[, id_sampled := sample(get(id), replace = T), by = "cell"]
		
		# Add case characteristics for sampled cases
		sampled_casedata <- casedata[, c(judge, "cell", id, "id_sampled"), with = F ]
		sampled_casedata <- merge(sampled_casedata, casedata[, c(id, observables), with = F], by.x = "id_sampled", by.y = id, sort = F)
		
		# Compute judge-level means of observables
		# and store the resulting data.table in
		# a numbered list 
		simulated_judge_means_list[[i]] <- sampled_casedata[, lapply(.SD, mean, na.rm = T), by = judge, .SDcols = observables ]
				
		# Update the progress bar
		setTxtProgressBar(iterations_progess_bar, i)
		
	}
	
	# List with data tables of per-observable judge means
	# to a data.table with simulation results
	simulated_judge_means <- rbindlist(simulated_judge_means_list, idcol = "simulation")
	
	# Compute IQRs of judge-averaged observables per simulation
	simulation_iqrs <- simulated_judge_means[, lapply(.SD, IQR, na.rm = T), by = "simulation", .SDcols = observables]
	
	# Means/SDs of those IQRs
	simulation_iqr_means <- simulation_iqrs[, lapply(.SD, mean, na.rm = T), .SDcols = observables]
	simulation_iqr_means[, variable := "mean_IQR"]
	
	simulation_iqr_sds <- simulation_iqrs[, lapply(.SD, sd, na.rm = T), .SDcols = observables]
	simulation_iqr_sds[, variable := "sd_IQR"]

	# Wide to long
	simulation_iqr_means <- melt(simulation_iqr_means, id.vars = "variable", variable.name = "observable", value.name = "mean_IQR")
	simulation_iqr_sds <- melt(simulation_iqr_sds, id.vars = "variable", variable.name = "observable", value.name = "sd_IQR")

	# Object with simulation results
	simulation_results <- merge(simulation_iqr_means[, -"variable"], simulation_iqr_sds[, -"variable"], by = "observable")
	
	# Compute empirical IQRs as well
	empirical_judge_means <- casedata[, lapply(.SD, mean, na.rm = T), by = judge, .SDcols = observables ]
	empirical_iqrs <- empirical_judge_means[, lapply(.SD, IQR, na.rm = T), .SDcols = observables]

	# Object to store empirical p-values
	empirical_pvalues <- data.table()
	
	# Compute them
	for(obs in observables) {
	
		# Which proportion of a sample is less than the empirical cutoff
		prop_sample_less <- ecdf(simulation_iqrs[[obs]])(empirical_iqrs[[obs]])
		
		# p-value for one-tailed test
		# Note that for the (more conventional)
		# two-tailed test multiply p-value by two
		pvalue_res <- ifelse(prop_sample_less > 0.5, 1 - prop_sample_less, prop_sample_less)
		
		empirical_pvalues <- rbind(empirical_pvalues, data.table(observable = obs, pvalue = pvalue_res))
		
	}
	
	# Organize final result
	empirical_iqrs[, variable := "empirical" ]
	empirical_iqrs <- melt(empirical_iqrs, id.vars = "variable", variable.name = "observable", value.name = "empirical_IQR")
	empirical_iqrs[, variable := NULL ]
	
	results <- merge(empirical_iqrs, simulation_results, by = "observable")
	results <- merge(results, empirical_pvalues, by = "observable")
	
	return(results)
	
}

##############################
# Perform simulations

# Crimea
## Government vs. private disputes, first instance
crimea_random_government <- test_random_case_assignment(casedata = crimea_cases[ involved_government == 1 ], id = "caseid", court = "courtTag", judge = "presiding_judge", category = "caseCategoryRedux", time = "registrationmonth", observables = c("petty_case", "countDocumentsByCourt", "days_elapsed", "claimSum_deflated", "was_appealed", "government_plaintiff", "government_win_first"), simulations = 3000)

## Private vs. private disputes where one of parties is outside Crimea
crimea_random_local_entity <- test_random_case_assignment(casedata = crimea_cases[ involved_government == 0 & local_entities_only == 0 ], id = "caseid", court = "courtTag", judge = "presiding_judge", category = "caseCategoryRedux", time = "registrationmonth", observables = c("petty_case", "countDocumentsByCourt", "days_elapsed", "claimSum_deflated", "was_appealed", "local_entity_plaintiff", "local_entity_win_first"), simulations = 3000)

# Krasnodar
## Government vs. private disputes, first instance
krasnodar_random_government <- test_random_case_assignment(casedata = krasnodar_cases[ involved_government == 1 ], id = "caseid", court = "courtTag", judge = "presiding_judge", category = "caseCategoryRedux", time = "registrationmonth", observables = c("petty_case", "countDocumentsByCourt", "days_elapsed", "claimSum_deflated", "was_appealed", "government_plaintiff", "government_win_first"), simulations = 3000)

## Private vs. private disputes where one of parties is outside krasnodar
krasnodar_random_local_entity <- test_random_case_assignment(casedata = krasnodar_cases[ involved_government == 0 & local_entities_only == 0 ], id = "caseid", court = "courtTag", judge = "presiding_judge", category = "caseCategoryRedux", time = "registrationmonth", observables = c("petty_case", "countDocumentsByCourt", "days_elapsed", "claimSum_deflated", "was_appealed", "local_entity_plaintiff", "local_entity_win_first"), simulations = 3000)

# Proper row order
crimea_random_government <- crimea_random_government[ match(c("government_win_first", "government_plaintiff", "petty_case", "claimSum_deflated", "days_elapsed", "countDocumentsByCourt", "was_appealed"), observable)]
krasnodar_random_government <- krasnodar_random_government[ match(c("government_win_first", "government_plaintiff", "petty_case", "claimSum_deflated", "days_elapsed", "countDocumentsByCourt", "was_appealed"), observable)]

crimea_random_local_entity <- crimea_random_local_entity[ match(c("local_entity_win_first", "local_entity_plaintiff", "petty_case", "claimSum_deflated", "days_elapsed", "countDocumentsByCourt", "was_appealed"), observable)]
krasnodar_random_local_entity <- krasnodar_random_local_entity[ match(c("local_entity_win_first", "local_entity_plaintiff", "petty_case", "claimSum_deflated", "days_elapsed", "countDocumentsByCourt", "was_appealed"), observable)]

##############################
# TABLE S7. Random-assignment simulation results by case characteristics: Abrams, Bertrand, and Mullainathan 2012 test

# Export the tables
fwrite(crimea_random_government, file = "tables/tableS7_crimea_random_government.csv")
fwrite(crimea_random_local_entity, file = "tables/tableS7_crimea_random_local_entity.csv")

fwrite(krasnodar_random_government, file = "tables/tableS7_krasnodar_random_government.csv")
fwrite(krasnodar_random_local_entity, file = "tables/tableS7_krasnodar_random_local_entity.csv")
